There appears to be significant difference in the rate of bad variable naming between the low and high debt groups.
d.both_completed %>%
ggplot(aes(x=var_names_new_good.ratio, fill=high_debt_version)) +
geom_boxplot() +
labs(
title = "Distribution of good variable naming rate for the different debt levels (new variables)",
x ="rate of good variable name selection"
) +
scale_y_continuous(breaks = NULL) +
scale_fill_manual(
name = "Debt level",
labels = c("High debt", "Low debt"),
values = c("#7070FF", "lightblue"),
guide = guide_legend(reverse = TRUE)
)
### Copied Variable Names
d.both_completed %>%
ggplot(aes(x=var_names_copied_good.ratio, fill=high_debt_version)) +
geom_boxplot() +
labs(
title = "Distribution of good variable naming rate for the different debt levels (copied variables)",
x ="rate of good variable name selection"
) +
scale_y_continuous(breaks = NULL) +
scale_fill_manual(
name = "Debt level",
labels = c("High debt", "Low debt"),
values = c("#7070FF", "lightblue"),
guide = guide_legend(reverse = TRUE)
)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
d.both_completed %>%
pull(var_names_new_good.ratio) %>%
summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 1.0000 1.0000 0.8425 1.0000 1.0000
sprintf("Variance: %.2f", var(pull(d.both_completed, var_names_new_good.ratio)))
## [1] "Variance: 0.10"
Variable names are modeled using the binomial family, where the amount of trials is the total amount of new variables..
We include high_debt_verison as well as a varying intercept for each individual in our initial model.
We iterate over the model until we have sane priors, in this case a prior giving a 50/50 chance was chosen in both cases. The prior “lkj(2)” will mean the model is sceptical of strong correlations.
variable_names.with <- extendable_model(
base_name = "variable_names",
base_formula = "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
base_priors = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = d.both_completed,
base_control = list(adapt_delta = 0.95)
)
prior_summary(variable_names.with(only_priors= TRUE))
prior_summary(variable_names.with(sample_prior = "only"))
pp_check(variable_names.with(sample_prior = "only"), nsamples = 200)
Är detta verkligen rimligt öht?
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 2, 1)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- exp(sim.intercept + sim.beta) - exp(sim.intercept)
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-25, 25) +
labs(
title = "Beta parameter prior influence",
x = "Good var names",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice. #### Posterior Predictive check
pp_check(variable_names.with(), nsamples = 200, type = "bars")
#### Summary
summary(variable_names.with())
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.73 0.60 0.73 3.13 1.00 1012 1849
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.49 0.50 0.62 2.59 1.00 2055
## high_debt_versionfalse 2.40 0.59 1.28 3.61 1.00 3646
## Tail_ESS
## Intercept 2358
## high_debt_versionfalse 2980
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(variable_names.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
loo_result <- loo(
# Benchmark model(s)
variable_names.with(),
# New model(s)
variable_names.with("work_domain"),
variable_names.with("work_experience_programming.s"),
variable_names.with("work_experience_java.s"),
variable_names.with("education_field"),
variable_names.with("mo(education_level)", edlvl_prior),
variable_names.with("workplace_peer_review"),
variable_names.with("workplace_td_tracking"),
variable_names.with("workplace_pair_programming"),
variable_names.with("workplace_coding_standards"),
variable_names.with("scenario"),
variable_names.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## variable_names.with("scenario") 0.0 0.0
## variable_names.with("workplace_td_tracking") -0.4 1.3
## variable_names.with("education_field") -0.4 1.5
## variable_names.with() -0.5 1.2
## variable_names.with("work_experience_java.s") -0.7 1.1
## variable_names.with("workplace_pair_programming") -0.8 1.4
## variable_names.with("group") -0.9 1.4
## variable_names.with("work_domain") -1.1 1.3
## variable_names.with("workplace_peer_review") -1.1 1.1
## variable_names.with("workplace_coding_standards") -1.2 1.4
## variable_names.with("work_experience_programming.s") -1.3 1.1
## variable_names.with("mo(education_level)", edlvl_prior) -1.8 1.5
loo_result[1]
## $loos
## $loos$`variable_names.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.3 6.1
## p_loo 14.6 2.8
## looic 72.6 12.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1443
## (0.5, 0.7] (ok) 15 34.1% 165
## (0.7, 1] (bad) 8 18.2% 81
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.9 6.3
## p_loo 15.9 3.0
## looic 73.8 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1200
## (0.5, 0.7] (ok) 9 20.5% 840
## (0.7, 1] (bad) 14 31.8% 23
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.1 6.2
## p_loo 15.7 2.9
## looic 74.2 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 1216
## (0.5, 0.7] (ok) 11 25.0% 192
## (0.7, 1] (bad) 10 22.7% 34
## (1, Inf) (very bad) 1 2.3% 20
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.5 6.1
## p_loo 15.3 2.8
## looic 73.0 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1523
## (0.5, 0.7] (ok) 16 36.4% 226
## (0.7, 1] (bad) 9 20.5% 32
## (1, Inf) (very bad) 1 2.3% 76
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.2 6.0
## p_loo 13.2 2.5
## looic 72.4 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 24 54.5% 582
## (0.5, 0.7] (ok) 12 27.3% 210
## (0.7, 1] (bad) 8 18.2% 65
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.5 6.5
## p_loo 16.1 3.2
## looic 75.1 12.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 20 45.5% 581
## (0.5, 0.7] (ok) 12 27.3% 122
## (0.7, 1] (bad) 11 25.0% 35
## (1, Inf) (very bad) 1 2.3% 25
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.9 6.2
## p_loo 15.5 2.9
## looic 73.8 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1625
## (0.5, 0.7] (ok) 15 34.1% 201
## (0.7, 1] (bad) 12 27.3% 20
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.1 6.1
## p_loo 14.8 2.8
## looic 72.3 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1060
## (0.5, 0.7] (ok) 13 29.5% 226
## (0.7, 1] (bad) 9 20.5% 38
## (1, Inf) (very bad) 1 2.3% 54
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.5 6.2
## p_loo 15.2 2.9
## looic 73.1 12.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1484
## (0.5, 0.7] (ok) 15 34.1% 251
## (0.7, 1] (bad) 11 25.0% 68
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.0 6.3
## p_loo 15.7 3.1
## looic 74.0 12.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 1315
## (0.5, 0.7] (ok) 10 22.7% 105
## (0.7, 1] (bad) 8 18.2% 45
## (1, Inf) (very bad) 1 2.3% 10
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -35.8 5.8
## p_loo 14.7 2.6
## looic 71.5 11.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 20 45.5% 1434
## (0.5, 0.7] (ok) 13 29.5% 243
## (0.7, 1] (bad) 11 25.0% 47
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.7 6.2
## p_loo 15.6 3.0
## looic 73.3 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 2053
## (0.5, 0.7] (ok) 12 27.3% 168
## (0.7, 1] (bad) 9 20.5% 57
## (1, Inf) (very bad) 1 2.3% 33
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
variable_names.with(),
variable_names.with("work_experience_programming.s"),
variable_names.with("work_experience_java.s"),
variable_names.with("workplace_peer_review"),
variable_names.with("workplace_td_tracking"),
variable_names.with("workplace_coding_standards"),
#New model(s)
variable_names.with(c("scenario","workplace_td_tracking")),
variable_names.with(c("scenario","workplace_peer_review")),
variable_names.with(c("scenario","work_experience_java.s"))
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## variable_names.with(c("scenario", "workplace_peer_review")) 0.0 0.0
## variable_names.with("workplace_td_tracking") -0.1 1.5
## variable_names.with() -0.3 1.4
## variable_names.with("work_experience_java.s") -0.5 1.3
## variable_names.with(c("scenario", "work_experience_java.s")) -0.6 0.6
## variable_names.with(c("scenario", "workplace_td_tracking")) -0.7 0.8
## variable_names.with("workplace_peer_review") -0.9 1.2
## variable_names.with("workplace_coding_standards") -1.0 1.4
## variable_names.with("work_experience_programming.s") -1.1 1.2
loo_result[1]
## $loos
## $loos$`variable_names.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.3 6.1
## p_loo 14.6 2.8
## looic 72.6 12.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1443
## (0.5, 0.7] (ok) 15 34.1% 165
## (0.7, 1] (bad) 8 18.2% 81
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.1 6.2
## p_loo 15.7 2.9
## looic 74.2 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 1216
## (0.5, 0.7] (ok) 11 25.0% 192
## (0.7, 1] (bad) 10 22.7% 34
## (1, Inf) (very bad) 1 2.3% 20
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.5 6.1
## p_loo 15.3 2.8
## looic 73.0 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1523
## (0.5, 0.7] (ok) 16 36.4% 226
## (0.7, 1] (bad) 9 20.5% 32
## (1, Inf) (very bad) 1 2.3% 76
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.9 6.2
## p_loo 15.5 2.9
## looic 73.8 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1625
## (0.5, 0.7] (ok) 15 34.1% 201
## (0.7, 1] (bad) 12 27.3% 20
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.1 6.1
## p_loo 14.8 2.8
## looic 72.3 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1060
## (0.5, 0.7] (ok) 13 29.5% 226
## (0.7, 1] (bad) 9 20.5% 38
## (1, Inf) (very bad) 1 2.3% 54
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.0 6.3
## p_loo 15.7 3.1
## looic 74.0 12.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 1315
## (0.5, 0.7] (ok) 10 22.7% 105
## (0.7, 1] (bad) 8 18.2% 45
## (1, Inf) (very bad) 1 2.3% 10
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with(c("scenario", "workplace_td_tracking"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.7 6.0
## p_loo 15.7 2.8
## looic 73.3 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 889
## (0.5, 0.7] (ok) 6 13.6% 535
## (0.7, 1] (bad) 16 36.4% 37
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with(c("scenario", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.0 6.0
## p_loo 15.2 2.8
## looic 72.0 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1509
## (0.5, 0.7] (ok) 16 36.4% 177
## (0.7, 1] (bad) 10 22.7% 30
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with(c("scenario", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.6 6.0
## p_loo 15.8 2.9
## looic 73.1 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1592
## (0.5, 0.7] (ok) 14 31.8% 775
## (0.7, 1] (bad) 13 29.5% 58
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo(
# Benchmark model(s)
variable_names.with(),
variable_names.with("work_experience_programming.s"),
variable_names.with("work_experience_java.s"),
variable_names.with("workplace_peer_review"),
variable_names.with("workplace_td_tracking"),
variable_names.with("workplace_coding_standards"),
variable_names.with(c("scenario","workplace_td_tracking")),
variable_names.with(c("scenario","workplace_peer_review")),
# New model(s)
variable_names.with(c("scenario","work_experience_java.s")),
variable_names.with(c("scenario","work_experience_java.s","workplace_td_tracking","workplace_peer_review")),
variable_names.with(c("scenario","work_experience_java.s","workplace_td_tracking"))
)
## Output of model 'variable_names.with()':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.3 6.1
## p_loo 14.6 2.8
## looic 72.6 12.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1443
## (0.5, 0.7] (ok) 15 34.1% 165
## (0.7, 1] (bad) 8 18.2% 81
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with("work_experience_programming.s")':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.1 6.2
## p_loo 15.7 2.9
## looic 74.2 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 1216
## (0.5, 0.7] (ok) 11 25.0% 192
## (0.7, 1] (bad) 10 22.7% 34
## (1, Inf) (very bad) 1 2.3% 20
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with("work_experience_java.s")':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.5 6.1
## p_loo 15.3 2.8
## looic 73.0 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1523
## (0.5, 0.7] (ok) 16 36.4% 226
## (0.7, 1] (bad) 9 20.5% 32
## (1, Inf) (very bad) 1 2.3% 76
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with("workplace_peer_review")':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.9 6.2
## p_loo 15.5 2.9
## looic 73.8 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1625
## (0.5, 0.7] (ok) 15 34.1% 201
## (0.7, 1] (bad) 12 27.3% 20
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with("workplace_td_tracking")':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.1 6.1
## p_loo 14.8 2.8
## looic 72.3 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1060
## (0.5, 0.7] (ok) 13 29.5% 226
## (0.7, 1] (bad) 9 20.5% 38
## (1, Inf) (very bad) 1 2.3% 54
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with("workplace_coding_standards")':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.0 6.3
## p_loo 15.7 3.1
## looic 74.0 12.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 1315
## (0.5, 0.7] (ok) 10 22.7% 105
## (0.7, 1] (bad) 8 18.2% 45
## (1, Inf) (very bad) 1 2.3% 10
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with(c("scenario", "workplace_td_tracking"))':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.7 6.0
## p_loo 15.7 2.8
## looic 73.3 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 889
## (0.5, 0.7] (ok) 6 13.6% 535
## (0.7, 1] (bad) 16 36.4% 37
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with(c("scenario", "workplace_peer_review"))':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.0 6.0
## p_loo 15.2 2.8
## looic 72.0 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1509
## (0.5, 0.7] (ok) 16 36.4% 177
## (0.7, 1] (bad) 10 22.7% 30
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with(c("scenario", "work_experience_java.s"))':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.6 6.0
## p_loo 15.8 2.9
## looic 73.1 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1592
## (0.5, 0.7] (ok) 14 31.8% 775
## (0.7, 1] (bad) 13 29.5% 58
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with(c("scenario", "work_experience_java.s", "workplace_td_tracking", "workplace_peer_review"))':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.9 6.2
## p_loo 16.5 3.1
## looic 73.7 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 764
## (0.5, 0.7] (ok) 9 20.5% 212
## (0.7, 1] (bad) 11 25.0% 25
## (1, Inf) (very bad) 2 4.5% 20
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names.with(c("scenario", "work_experience_java.s", "workplace_td_tracking"))':
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.7 6.1
## p_loo 16.0 2.9
## looic 73.4 12.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 23 52.3% 891
## (0.5, 0.7] (ok) 11 25.0% 319
## (0.7, 1] (bad) 9 20.5% 38
## (1, Inf) (very bad) 1 2.3% 18
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff
## variable_names.with(c("scenario", "workplace_peer_review")) 0.0
## variable_names.with("workplace_td_tracking") -0.1
## variable_names.with() -0.3
## variable_names.with("work_experience_java.s") -0.5
## variable_names.with(c("scenario", "work_experience_java.s")) -0.6
## variable_names.with(c("scenario", "workplace_td_tracking")) -0.7
## variable_names.with(c("scenario", "work_experience_java.s", "workplace_td_tracking")) -0.7
## variable_names.with(c("scenario", "work_experience_java.s", "workplace_td_tracking", "workplace_peer_review")) -0.9
## variable_names.with("workplace_peer_review") -0.9
## variable_names.with("workplace_coding_standards") -1.0
## variable_names.with("work_experience_programming.s") -1.1
## se_diff
## variable_names.with(c("scenario", "workplace_peer_review")) 0.0
## variable_names.with("workplace_td_tracking") 1.5
## variable_names.with() 1.4
## variable_names.with("work_experience_java.s") 1.3
## variable_names.with(c("scenario", "work_experience_java.s")) 0.6
## variable_names.with(c("scenario", "workplace_td_tracking")) 0.8
## variable_names.with(c("scenario", "work_experience_java.s", "workplace_td_tracking")) 0.8
## variable_names.with(c("scenario", "work_experience_java.s", "workplace_td_tracking", "workplace_peer_review")) 0.8
## variable_names.with("workplace_peer_review") 1.2
## variable_names.with("workplace_coding_standards") 1.4
## variable_names.with("work_experience_programming.s") 1.2
loo_result[2]
## $diffs
## elpd_diff se_diff
## variable_names.with(c("scenario", "workplace_peer_review")) 0.0 0.0
## variable_names.with("workplace_td_tracking") -0.1 1.5
## variable_names.with() -0.3 1.4
## variable_names.with("work_experience_java.s") -0.5 1.3
## variable_names.with(c("scenario", "work_experience_java.s")) -0.6 0.6
## variable_names.with(c("scenario", "workplace_td_tracking")) -0.7 0.8
## variable_names.with("workplace_peer_review") -0.9 1.2
## variable_names.with("workplace_coding_standards") -1.0 1.4
## variable_names.with("work_experience_programming.s") -1.1 1.2
loo_result[1]
## $loos
## $loos$`variable_names.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.3 6.1
## p_loo 14.6 2.8
## looic 72.6 12.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1443
## (0.5, 0.7] (ok) 15 34.1% 165
## (0.7, 1] (bad) 8 18.2% 81
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.1 6.2
## p_loo 15.7 2.9
## looic 74.2 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 1216
## (0.5, 0.7] (ok) 11 25.0% 192
## (0.7, 1] (bad) 10 22.7% 34
## (1, Inf) (very bad) 1 2.3% 20
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.5 6.1
## p_loo 15.3 2.8
## looic 73.0 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1523
## (0.5, 0.7] (ok) 16 36.4% 226
## (0.7, 1] (bad) 9 20.5% 32
## (1, Inf) (very bad) 1 2.3% 76
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.9 6.2
## p_loo 15.5 2.9
## looic 73.8 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1625
## (0.5, 0.7] (ok) 15 34.1% 201
## (0.7, 1] (bad) 12 27.3% 20
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.1 6.1
## p_loo 14.8 2.8
## looic 72.3 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 1060
## (0.5, 0.7] (ok) 13 29.5% 226
## (0.7, 1] (bad) 9 20.5% 38
## (1, Inf) (very bad) 1 2.3% 54
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -37.0 6.3
## p_loo 15.7 3.1
## looic 74.0 12.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 1315
## (0.5, 0.7] (ok) 10 22.7% 105
## (0.7, 1] (bad) 8 18.2% 45
## (1, Inf) (very bad) 1 2.3% 10
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with(c("scenario", "workplace_td_tracking"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.7 6.0
## p_loo 15.7 2.8
## looic 73.3 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 889
## (0.5, 0.7] (ok) 6 13.6% 535
## (0.7, 1] (bad) 16 36.4% 37
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with(c("scenario", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.0 6.0
## p_loo 15.2 2.8
## looic 72.0 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 18 40.9% 1509
## (0.5, 0.7] (ok) 16 36.4% 177
## (0.7, 1] (bad) 10 22.7% 30
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`variable_names.with(c("scenario", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -36.6 6.0
## p_loo 15.8 2.9
## looic 73.1 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 38.6% 1592
## (0.5, 0.7] (ok) 14 31.8% 775
## (0.7, 1] (bad) 13 29.5% 58
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
variable_names0 <- brm(
"var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = d.both_completed,
control = list(adapt_delta = 0.95),
file = "fits/variable_names0",
file_refit = "on_change",
seed = 20210421
)
summary(variable_names0)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)
## Data: d.both_completed (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.73 0.60 0.73 3.13 1.00 1012 1849
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.49 0.50 0.62 2.59 1.00 2055
## high_debt_versionfalse 2.40 0.59 1.28 3.61 1.00 3646
## Tail_ESS
## Intercept 2358
## high_debt_versionfalse 2980
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(variable_names0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.9593728 1.4598093 -1.4266878 4.35681025
## 6033d90a5af2c702367b3a96 1.2230461 1.3741480 -1.0700565 4.34180050
## 6034fc165af2c702367b3a98 -1.3461432 0.6400501 -2.6174202 -0.14643655
## 603500725af2c702367b3a99 -1.4742955 1.0719886 -3.6895763 0.53541888
## 603f97625af2c702367b3a9d 1.2794403 1.4101163 -0.9540715 4.59459200
## 603fd5d95af2c702367b3a9e -1.4598097 1.0828863 -3.6483898 0.62073472
## 60409b7b5af2c702367b3a9f 1.2307225 1.4401095 -1.0276302 4.64772975
## 604b82b5a7718fbed181b336 -0.8433473 0.8443749 -2.5238425 0.75539888
## 6050c1bf856f36729d2e5218 -1.8230547 1.0291442 -3.9423910 0.05089144
## 6050e1e7856f36729d2e5219 1.2152962 1.3938698 -1.0467375 4.47374750
## 6055fdc6856f36729d2e521b 0.9924102 1.4850145 -1.4927917 4.32935150
## 60589862856f36729d2e521f 1.0562901 1.5039300 -1.4083792 4.56801325
## 605afa3a856f36729d2e5222 -1.5634428 1.1462360 -3.8976315 0.50356810
## 605c8bc6856f36729d2e5223 -0.9095465 0.9745567 -2.8730900 0.95339948
## 605f3f2d856f36729d2e5224 0.9674901 1.4864794 -1.4442637 4.38204675
## 605f46c3856f36729d2e5225 -1.6011170 0.8800919 -3.4298875 0.05894927
## 60605337856f36729d2e5226 0.9504888 1.4504408 -1.4196987 4.34745700
## 60609ae6856f36729d2e5228 1.2287439 1.3950698 -0.9455645 4.55586525
## 6061ce91856f36729d2e522e 1.2291203 1.4048307 -1.0173040 4.51340125
## 6061f106856f36729d2e5231 -1.4715682 1.0739459 -3.6457168 0.59643020
## 6068ea9f856f36729d2e523e 1.5684952 1.3439166 -0.4693869 4.83278950
## 6075ab05856f36729d2e5247 1.2781836 1.4038392 -0.9955097 4.59275500
plot(variable_names0, ask = FALSE)
pp_check(variable_names0, nsamples = 200, type = "bars")
We select the best performing model with one variable.
variable_names1 <- brm(
"var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + scenario + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = d.both_completed,
control = list(adapt_delta = 0.95),
file = "fits/variable_names1",
file_refit = "on_change",
seed = 20210421
)
summary(variable_names1)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + scenario + (1 | session)
## Data: d.both_completed (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.78 0.63 0.74 3.20 1.00 1475 2333
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.74 0.55 0.77 2.94 1.00 2253
## high_debt_versionfalse 2.42 0.56 1.34 3.57 1.00 4049
## scenariotickets -0.64 0.53 -1.69 0.38 1.00 4095
## Tail_ESS
## Intercept 2734
## high_debt_versionfalse 2752
## scenariotickets 2543
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(variable_names1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 1.093066 1.4632462 -1.3590025 4.41794275
## 6033d90a5af2c702367b3a96 1.199225 1.4948853 -1.1250630 4.53985300
## 6034fc165af2c702367b3a98 -1.570729 0.6830682 -3.0035820 -0.33337780
## 603500725af2c702367b3a99 -1.409009 1.1044456 -3.6444140 0.66224548
## 603f97625af2c702367b3a9d 1.248668 1.4289584 -1.0143170 4.49822175
## 603fd5d95af2c702367b3a9e -1.444694 1.1540950 -3.8164280 0.72340865
## 60409b7b5af2c702367b3a9f 1.193507 1.4823702 -1.0945442 4.71445900
## 604b82b5a7718fbed181b336 -1.025820 0.8695539 -2.8164268 0.60527305
## 6050c1bf856f36729d2e5218 -1.760403 1.0587253 -3.9362907 0.20798768
## 6050e1e7856f36729d2e5219 1.235263 1.4966840 -1.1258432 4.71174075
## 6055fdc6856f36729d2e521b 1.127720 1.4940310 -1.3266358 4.66202900
## 60589862856f36729d2e521f 1.162000 1.4871669 -1.2509405 4.72933225
## 605afa3a856f36729d2e5222 -1.517017 1.1826029 -3.9205807 0.73074608
## 605c8bc6856f36729d2e5223 -1.027399 0.9818350 -3.0101878 0.83709883
## 605f3f2d856f36729d2e5224 1.127413 1.4812606 -1.2892175 4.65922200
## 605f46c3856f36729d2e5225 -1.726124 0.8997469 -3.5510050 -0.05501024
## 60605337856f36729d2e5226 1.123126 1.4774671 -1.3124718 4.61499750
## 60609ae6856f36729d2e5228 1.225445 1.4894106 -1.1406840 4.75353650
## 6061ce91856f36729d2e522e 1.215768 1.4866867 -1.1666405 4.67304150
## 6061f106856f36729d2e5231 -1.431271 1.0992919 -3.6732065 0.62742388
## 6068ea9f856f36729d2e523e 1.606133 1.3889929 -0.5727873 4.87182500
## 6075ab05856f36729d2e5247 1.193832 1.4749458 -1.1719053 4.61740875
plot(variable_names1, ask = FALSE)
pp_check(variable_names1, nsamples = 200, type = "bars")
We select the best performing model with one variable.
variable_names2 <- brm(
"var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = d.both_completed,
control = list(adapt_delta = 0.95),
file = "fits/variable_names2",
file_refit = "on_change",
seed = 20210421
)
summary(variable_names2)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | session)
## Data: d.both_completed (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.87 0.65 0.84 3.38 1.00 1156 2050
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.81 0.57 0.79 3.00 1.00 2223
## high_debt_versionfalse 2.42 0.56 1.33 3.56 1.00 3898
## scenariotickets -0.68 0.54 -1.75 0.38 1.00 4098
## work_experience_java.s 0.19 0.52 -0.83 1.25 1.00 2292
## Tail_ESS
## Intercept 2688
## high_debt_versionfalse 2983
## scenariotickets 2827
## work_experience_java.s 2695
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(variable_names2)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 1.255241 1.6053544 -1.3900780 5.159761750
## 6033d90a5af2c702367b3a96 1.294544 1.5418674 -1.2093457 4.782044750
## 6034fc165af2c702367b3a98 -1.556082 0.7137566 -2.9827595 -0.221446625
## 603500725af2c702367b3a99 -1.430599 1.1381464 -3.7108325 0.695586375
## 603f97625af2c702367b3a9d 1.369281 1.4948888 -1.0121733 4.841596000
## 603fd5d95af2c702367b3a9e -1.404366 1.1495959 -3.7413395 0.733319100
## 60409b7b5af2c702367b3a9f 1.296609 1.5265936 -1.1419105 4.965830250
## 604b82b5a7718fbed181b336 -1.030896 0.8844133 -2.7825160 0.656808625
## 6050c1bf856f36729d2e5218 -1.795700 1.0990693 -4.0444755 0.250461950
## 6050e1e7856f36729d2e5219 1.282235 1.5265490 -1.1986235 4.830863000
## 6055fdc6856f36729d2e521b 1.226939 1.5587861 -1.2501895 4.858555500
## 60589862856f36729d2e521f 1.124807 1.5741603 -1.5739028 4.689689000
## 605afa3a856f36729d2e5222 -1.779118 1.3643354 -4.6536703 0.687837200
## 605c8bc6856f36729d2e5223 -1.191728 1.0574864 -3.2684370 0.881749075
## 605f3f2d856f36729d2e5224 1.057338 1.8417415 -2.1496275 5.221295000
## 605f46c3856f36729d2e5225 -1.748655 0.9338799 -3.6239415 0.002337589
## 60605337856f36729d2e5226 1.209346 1.6016507 -1.3611807 4.915383750
## 60609ae6856f36729d2e5228 1.286541 1.5000872 -1.1147158 4.710303500
## 6061ce91856f36729d2e522e 1.296802 1.5222352 -1.1925990 4.764905750
## 6061f106856f36729d2e5231 -1.414230 1.1667625 -3.8232542 0.782362025
## 6068ea9f856f36729d2e523e 1.662751 1.4977089 -0.6433186 5.295703000
## 6075ab05856f36729d2e5247 1.305361 1.5232928 -1.1335567 4.860366750
plot(variable_names2, ask = FALSE)
pp_check(variable_names2, nsamples = 200, type = "bars")
We select the best performing model with one variable.
variable_names3 <- brm(
"var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = d.both_completed,
control = list(adapt_delta = 0.95),
file = "fits/variable_names3",
file_refit = "on_change",
seed = 20210421
)
summary(variable_names3)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | session)
## Data: d.both_completed (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.87 0.65 0.84 3.38 1.00 1156 2050
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.81 0.57 0.79 3.00 1.00 2223
## high_debt_versionfalse 2.42 0.56 1.33 3.56 1.00 3898
## scenariotickets -0.68 0.54 -1.75 0.38 1.00 4098
## work_experience_java.s 0.19 0.52 -0.83 1.25 1.00 2292
## Tail_ESS
## Intercept 2688
## high_debt_versionfalse 2983
## scenariotickets 2827
## work_experience_java.s 2695
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(variable_names3)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 1.255241 1.6053544 -1.3900780 5.159761750
## 6033d90a5af2c702367b3a96 1.294544 1.5418674 -1.2093457 4.782044750
## 6034fc165af2c702367b3a98 -1.556082 0.7137566 -2.9827595 -0.221446625
## 603500725af2c702367b3a99 -1.430599 1.1381464 -3.7108325 0.695586375
## 603f97625af2c702367b3a9d 1.369281 1.4948888 -1.0121733 4.841596000
## 603fd5d95af2c702367b3a9e -1.404366 1.1495959 -3.7413395 0.733319100
## 60409b7b5af2c702367b3a9f 1.296609 1.5265936 -1.1419105 4.965830250
## 604b82b5a7718fbed181b336 -1.030896 0.8844133 -2.7825160 0.656808625
## 6050c1bf856f36729d2e5218 -1.795700 1.0990693 -4.0444755 0.250461950
## 6050e1e7856f36729d2e5219 1.282235 1.5265490 -1.1986235 4.830863000
## 6055fdc6856f36729d2e521b 1.226939 1.5587861 -1.2501895 4.858555500
## 60589862856f36729d2e521f 1.124807 1.5741603 -1.5739028 4.689689000
## 605afa3a856f36729d2e5222 -1.779118 1.3643354 -4.6536703 0.687837200
## 605c8bc6856f36729d2e5223 -1.191728 1.0574864 -3.2684370 0.881749075
## 605f3f2d856f36729d2e5224 1.057338 1.8417415 -2.1496275 5.221295000
## 605f46c3856f36729d2e5225 -1.748655 0.9338799 -3.6239415 0.002337589
## 60605337856f36729d2e5226 1.209346 1.6016507 -1.3611807 4.915383750
## 60609ae6856f36729d2e5228 1.286541 1.5000872 -1.1147158 4.710303500
## 6061ce91856f36729d2e522e 1.296802 1.5222352 -1.1925990 4.764905750
## 6061f106856f36729d2e5231 -1.414230 1.1667625 -3.8232542 0.782362025
## 6068ea9f856f36729d2e523e 1.662751 1.4977089 -0.6433186 5.295703000
## 6075ab05856f36729d2e5247 1.305361 1.5232928 -1.1335567 4.860366750
plot(variable_names3, ask = FALSE)
pp_check(variable_names2, nsamples = 200, type = "bars")
All candidate models look nice, none is significantly better than the others, we will proceed the simplest model: variable_names0
We will try a few different variations of the selected candidate model.
Some participants only completed one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
Some participants only completed one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
variable_names0.all <- brm(
"var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = as.data.frame(d.completed),
control = list(adapt_delta = 0.95),
file = "fits/variable_names0.all",
file_refit = "on_change",
seed = 20210421
)
summary(variable_names0.all)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.68 0.56 0.73 2.88 1.00 1366 1947
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.48 0.47 0.64 2.48 1.00 2471
## high_debt_versionfalse 2.48 0.56 1.38 3.63 1.00 4459
## Tail_ESS
## Intercept 2924
## high_debt_versionfalse 3101
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(variable_names0.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -1.3869510 1.1083804 -3.8059562 0.640411375
## 6033d69a5af2c702367b3a95 0.9464261 1.4285258 -1.3969613 4.269342250
## 6033d90a5af2c702367b3a96 1.2207704 1.4042311 -1.0971247 4.478865000
## 6034fc165af2c702367b3a98 -1.3309234 0.6231877 -2.5818770 -0.161306950
## 603500725af2c702367b3a99 -1.4661002 1.0549079 -3.6853402 0.487411200
## 603f84f15af2c702367b3a9b 0.2402520 1.6041079 -2.6659903 3.716927500
## 603f97625af2c702367b3a9d 1.2332757 1.3681069 -0.9525377 4.445348250
## 603fd5d95af2c702367b3a9e -1.4621612 1.0503431 -3.6188418 0.455923700
## 60409b7b5af2c702367b3a9f 1.1755969 1.3813356 -1.0581940 4.415432500
## 604b82b5a7718fbed181b336 -0.8481773 0.8143013 -2.4817405 0.734885900
## 604f1239a7718fbed181b33f 0.8292040 1.4489824 -1.6342780 4.167540750
## 6050c1bf856f36729d2e5218 -1.8170189 0.9803604 -3.8701228 0.001106741
## 6050e1e7856f36729d2e5219 1.2178405 1.3740012 -1.0049792 4.283213500
## 6055fdc6856f36729d2e521b 0.9593235 1.3948570 -1.3593255 4.085696000
## 60579f2a856f36729d2e521e 0.2664243 1.6512415 -2.8754408 3.754241250
## 60589862856f36729d2e521f 0.9479557 1.3618734 -1.2958543 4.018388750
## 605a30a7856f36729d2e5221 0.1532920 1.7078751 -3.1326363 3.890949750
## 605afa3a856f36729d2e5222 -1.5711265 1.0827390 -3.8075067 0.384341750
## 605c8bc6856f36729d2e5223 -0.8891101 0.9675897 -2.7592045 0.983294425
## 605f3f2d856f36729d2e5224 0.9129801 1.4034211 -1.4173490 4.173405250
## 605f46c3856f36729d2e5225 -1.5649874 0.8896083 -3.4256675 0.028091653
## 60605337856f36729d2e5226 0.9038988 1.3661650 -1.3900227 4.107031500
## 60609ae6856f36729d2e5228 1.2167153 1.3845780 -0.9594529 4.446352750
## 6061ce91856f36729d2e522e 1.2286107 1.3928314 -1.0167965 4.369936750
## 6061f106856f36729d2e5231 -1.4675347 1.0699160 -3.6145143 0.529039150
## 60672faa856f36729d2e523c 0.2738112 1.5961321 -2.5484403 3.872159750
## 6068ea9f856f36729d2e523e 1.5420235 1.3225310 -0.5237006 4.541578250
## 606db69d856f36729d2e5243 0.4964141 1.5464307 -2.2223970 3.969827250
## 6075ab05856f36729d2e5247 1.1906455 1.3682642 -1.0126700 4.445376000
plot(variable_names0.all, ask = FALSE)
pp_check(variable_names0.all, nsamples = 200, type = "bars")
As including all data points didn’t harm the model we will create this variant with all data points as well.
This variation includes work_experience_programming.s predictors as it can give further insight into how experience play a factor in the effect we try to measure. This is especially important as our sampling shewed towards containing less experienced developer than the population at large.
variable_names0.all.exp <- brm(
"var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(2, 1), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = binomial(),
data = as.data.frame(d.completed),
control = list(adapt_delta = 0.95),
file = "fits/variable_names0.all.exp",
file_refit = "on_change",
seed = 20210421
)
summary(variable_names0.all)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.68 0.56 0.73 2.88 1.00 1366 1947
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 1.48 0.47 0.64 2.48 1.00 2471
## high_debt_versionfalse 2.48 0.56 1.38 3.63 1.00 4459
## Tail_ESS
## Intercept 2924
## high_debt_versionfalse 3101
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(variable_names0.all.exp)
## Family: binomial
## Links: mu = logit
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.76 0.57 0.81 3.07 1.00 1365 1777
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept 1.53 0.49 0.68 2.56 1.00
## high_debt_versionfalse 2.50 0.57 1.40 3.66 1.00
## work_experience_programming.s 0.15 0.47 -0.74 1.11 1.00
## Bulk_ESS Tail_ESS
## Intercept 1902 2602
## high_debt_versionfalse 3610 2859
## work_experience_programming.s 2396 2518
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(variable_names0.all.exp)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -1.4182361 1.1827451 -3.9014715 0.7405075250
## 6033d69a5af2c702367b3a95 1.0248475 1.4662706 -1.4678295 4.3656542500
## 6033d90a5af2c702367b3a96 1.3181878 1.4271892 -0.9734192 4.5846872500
## 6034fc165af2c702367b3a98 -1.3412419 0.6487407 -2.6612825 -0.0874654975
## 603500725af2c702367b3a99 -1.4686652 1.0913363 -3.6888770 0.5774463500
## 603f84f15af2c702367b3a9b 0.2668357 1.6400445 -2.8219980 3.7210642500
## 603f97625af2c702367b3a9d 1.3115338 1.3942632 -0.9248167 4.4947100000
## 603fd5d95af2c702367b3a9e -1.4897272 1.0749163 -3.7007410 0.5424600250
## 60409b7b5af2c702367b3a9f 1.2703161 1.4355589 -1.0773738 4.5062062500
## 604b82b5a7718fbed181b336 -0.8523828 0.8496030 -2.5182638 0.7348042750
## 604f1239a7718fbed181b33f 0.9114816 1.4889058 -1.5963553 4.3514157500
## 6050c1bf856f36729d2e5218 -1.9168515 1.0339995 -4.0199970 -0.0009028132
## 6050e1e7856f36729d2e5219 1.2747172 1.4296471 -1.0629927 4.6702740000
## 6055fdc6856f36729d2e521b 1.0303782 1.4959962 -1.5193685 4.3312645000
## 60579f2a856f36729d2e521e 0.2855757 1.6437444 -2.7615500 3.7951775000
## 60589862856f36729d2e521f 0.9694355 1.5701748 -1.7895887 4.4170072500
## 605a30a7856f36729d2e5221 0.1449444 1.7374905 -3.1083622 3.8912440000
## 605afa3a856f36729d2e5222 -1.8136133 1.2904502 -4.5523393 0.4529618500
## 605c8bc6856f36729d2e5223 -0.9854226 1.0173308 -3.0414090 0.9667453250
## 605f3f2d856f36729d2e5224 0.9172891 1.6122785 -2.1206710 4.4149135000
## 605f46c3856f36729d2e5225 -1.5721425 0.8818168 -3.3500108 0.0843726225
## 60605337856f36729d2e5226 1.0181315 1.4778241 -1.4552510 4.3759702500
## 60609ae6856f36729d2e5228 1.2786818 1.4149702 -0.9782539 4.5644302500
## 6061ce91856f36729d2e522e 1.3037518 1.4168388 -1.0236575 4.6591297500
## 6061f106856f36729d2e5231 -1.4808466 1.0753831 -3.5911405 0.5323328000
## 60672faa856f36729d2e523c 0.2831757 1.6193232 -2.6897402 3.6969037500
## 6068ea9f856f36729d2e523e 1.5953833 1.3507665 -0.4696562 4.6847365000
## 606db69d856f36729d2e5243 0.4684324 1.5947690 -2.4460730 4.0236922500
## 6075ab05856f36729d2e5247 1.3106881 1.4807536 -1.0039080 4.7142510000
plot(variable_names0.all.exp, ask = FALSE)
pp_check(variable_names0.all.exp, nsamples = 200, type = "bars")
loo(
variable_names0.all,
variable_names0.all.exp
)
## Output of model 'variable_names0.all':
##
## Computed from 4000 by 51 log-likelihood matrix
##
## Estimate SE
## elpd_loo -38.6 6.3
## p_loo 15.0 2.8
## looic 77.2 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 24 47.1% 1056
## (0.5, 0.7] (ok) 17 33.3% 217
## (0.7, 1] (bad) 10 19.6% 82
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'variable_names0.all.exp':
##
## Computed from 4000 by 51 log-likelihood matrix
##
## Estimate SE
## elpd_loo -39.5 6.4
## p_loo 16.3 3.0
## looic 79.1 12.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 43.1% 1097
## (0.5, 0.7] (ok) 15 29.4% 272
## (0.7, 1] (bad) 14 27.5% 48
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## variable_names0.all 0.0 0.0
## variable_names0.all.exp -0.9 0.4
This means that our final model, with all data points and experience predictors, is variable_names0.all.exp
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(variable_names0.all.exp, pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
ggtitle("Beta parameters densities in time model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
We start by extracting posterior samples
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
var_names_new_all = 1000,
work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)
post <- posterior_predict(variable_names0.all.exp, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming
)%>%
mutate(estimate = estimate/1000)
ggplot(post %>% filter(work_experience_programming == 10), aes(x=estimate, fill = high_debt_version)) +
geom_density(alpha = 0.5) +
scale_fill_manual(
name = "Debt version",
labels = c("Low debt", "High debt"),
values = c("lightblue", "darkblue")
) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "Rate of good variable naming",
x = "Rate",
y = "Density"
)
ggplot(post, aes(x=estimate, fill = high_debt_version)) +
geom_density(alpha = 0.5) +
scale_fill_manual(
name = "Debt version",
labels = c("Low debt", "High debt"),
values = c("lightblue", "darkblue")
) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "Rate of good variable naming",
x = "Rate",
y = "Density"
)
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
var_names_new_all = 10,
work_experience_programming.s = sapply(c(10), scale_programming_experience)
)
post <- posterior_predict(variable_names0.all.exp, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming
)
levels(post$high_debt_version) <- c("Low debt version", "High debt version")
ggplot(post, aes(x=estimate, fill = high_debt_version)) +
geom_bar() +
facet_grid(rows = vars(high_debt_version)) +
scale_fill_manual(
name = "Debt version",
labels = c("Low debt version", "High debt version"),
values = c("lightblue", "darkblue")
) +
labs(
title = "Variable naming (10 named variables)",
x = "Number of good variable names",
y = "Rate of occurrence"
) +
theme_minimal() +
scale_x_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9,10), labels = c(0,1,2,3,4,5,6,7,8,9,10)) +
scale_y_continuous(limits = NULL, breaks = c(500,1000,1500,2000,2500), labels = c("10%","20%","30%","40%","50%")) + theme(legend.position = "hidden")
We can also plot the difference between good variable names for the high debt version and the low debt version.